In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')
In [10]:
path =r"Z:\Taiwan-Customer defaults csv.csv"
print(path)
data = pd.read_csv(path)
print(data)
Z:\Taiwan-Customer defaults csv.csv
          ID  LIMIT_BAL  SEX  EDUCATION  MARRIAGE  AGE  PAY_0  PAY_2  PAY_3  \
0          1      20000    2          2         1   24      2      2     -1   
1          2     120000    2          2         2   26     -1      2      0   
2          3      90000    2          2         2   34      0      0      0   
3          4      50000    2          2         1   37      0      0      0   
4          5      50000    1          2         1   57     -1      0     -1   
...      ...        ...  ...        ...       ...  ...    ...    ...    ...   
29995  29996     220000    1          3         1   39      0      0      0   
29996  29997     150000    1          3         2   43     -1     -1     -1   
29997  29998      30000    1          2         2   37      4      3      2   
29998  29999      80000    1          3         1   41      1     -1      0   
29999  30000      50000    1          2         1   46      0      0      0   

       PAY_4  ...  BILL_AMT4  BILL_AMT5  BILL_AMT6  PAY_AMT1  PAY_AMT2  \
0         -1  ...          0          0          0         0       689   
1          0  ...       3272       3455       3261         0      1000   
2          0  ...      14331      14948      15549      1518      1500   
3          0  ...      28314      28959      29547      2000      2019   
4          0  ...      20940      19146      19131      2000     36681   
...      ...  ...        ...        ...        ...       ...       ...   
29995      0  ...      88004      31237      15980      8500     20000   
29996     -1  ...       8979       5190          0      1837      3526   
29997     -1  ...      20878      20582      19357         0         0   
29998      0  ...      52774      11855      48944     85900      3409   
29999      0  ...      36535      32428      15313      2078      1800   

       PAY_AMT3  PAY_AMT4  PAY_AMT5  PAY_AMT6  default payment next month  
0             0         0         0         0                           1  
1          1000      1000         0      2000                           1  
2          1000      1000      1000      5000                           0  
3          1200      1100      1069      1000                           0  
4         10000      9000       689       679                           0  
...         ...       ...       ...       ...                         ...  
29995      5003      3047      5000      1000                           0  
29996      8998       129         0         0                           0  
29997     22000      4200      2000      3100                           1  
29998      1178      1926     52964      1804                           1  
29999      1430      1000      1000      1000                           1  

[30000 rows x 25 columns]
In [11]:
data.head()
Out[11]:
ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_0 PAY_2 PAY_3 PAY_4 ... BILL_AMT4 BILL_AMT5 BILL_AMT6 PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 default payment next month
0 1 20000 2 2 1 24 2 2 -1 -1 ... 0 0 0 0 689 0 0 0 0 1
1 2 120000 2 2 2 26 -1 2 0 0 ... 3272 3455 3261 0 1000 1000 1000 0 2000 1
2 3 90000 2 2 2 34 0 0 0 0 ... 14331 14948 15549 1518 1500 1000 1000 1000 5000 0
3 4 50000 2 2 1 37 0 0 0 0 ... 28314 28959 29547 2000 2019 1200 1100 1069 1000 0
4 5 50000 1 2 1 57 -1 0 -1 0 ... 20940 19146 19131 2000 36681 10000 9000 689 679 0

5 rows × 25 columns

In [12]:
data.tail()
Out[12]:
ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_0 PAY_2 PAY_3 PAY_4 ... BILL_AMT4 BILL_AMT5 BILL_AMT6 PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 default payment next month
29995 29996 220000 1 3 1 39 0 0 0 0 ... 88004 31237 15980 8500 20000 5003 3047 5000 1000 0
29996 29997 150000 1 3 2 43 -1 -1 -1 -1 ... 8979 5190 0 1837 3526 8998 129 0 0 0
29997 29998 30000 1 2 2 37 4 3 2 -1 ... 20878 20582 19357 0 0 22000 4200 2000 3100 1
29998 29999 80000 1 3 1 41 1 -1 0 0 ... 52774 11855 48944 85900 3409 1178 1926 52964 1804 1
29999 30000 50000 1 2 1 46 0 0 0 0 ... 36535 32428 15313 2078 1800 1430 1000 1000 1000 1

5 rows × 25 columns

In [13]:
print(data.shape)
(30000, 25)
In [14]:
print("Features of the dataset:")
data.columns
Features of the dataset:
Out[14]:
Index(['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0',
       'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',
       'default payment next month'],
      dtype='object')
In [15]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype
---  ------                      --------------  -----
 0   ID                          30000 non-null  int64
 1   LIMIT_BAL                   30000 non-null  int64
 2   SEX                         30000 non-null  int64
 3   EDUCATION                   30000 non-null  int64
 4   MARRIAGE                    30000 non-null  int64
 5   AGE                         30000 non-null  int64
 6   PAY_0                       30000 non-null  int64
 7   PAY_2                       30000 non-null  int64
 8   PAY_3                       30000 non-null  int64
 9   PAY_4                       30000 non-null  int64
 10  PAY_5                       30000 non-null  int64
 11  PAY_6                       30000 non-null  int64
 12  BILL_AMT1                   30000 non-null  int64
 13  BILL_AMT2                   30000 non-null  int64
 14  BILL_AMT3                   30000 non-null  int64
 15  BILL_AMT4                   30000 non-null  int64
 16  BILL_AMT5                   30000 non-null  int64
 17  BILL_AMT6                   30000 non-null  int64
 18  PAY_AMT1                    30000 non-null  int64
 19  PAY_AMT2                    30000 non-null  int64
 20  PAY_AMT3                    30000 non-null  int64
 21  PAY_AMT4                    30000 non-null  int64
 22  PAY_AMT5                    30000 non-null  int64
 23  PAY_AMT6                    30000 non-null  int64
 24  default payment next month  30000 non-null  int64
dtypes: int64(25)
memory usage: 5.7 MB
In [16]:
data.nunique()
Out[16]:
ID                            30000
LIMIT_BAL                        81
SEX                               2
EDUCATION                         7
MARRIAGE                          4
AGE                              56
PAY_0                            11
PAY_2                            11
PAY_3                            11
PAY_4                            11
PAY_5                            10
PAY_6                            10
BILL_AMT1                     22723
BILL_AMT2                     22346
BILL_AMT3                     22026
BILL_AMT4                     21548
BILL_AMT5                     21010
BILL_AMT6                     20604
PAY_AMT1                       7943
PAY_AMT2                       7899
PAY_AMT3                       7518
PAY_AMT4                       6937
PAY_AMT5                       6897
PAY_AMT6                       6939
default payment next month        2
dtype: int64
In [17]:
data.describe().T
Out[17]:
count mean std min 25% 50% 75% max
ID 30000.0 15000.500000 8660.398374 1.0 7500.75 15000.5 22500.25 30000.0
LIMIT_BAL 30000.0 167484.322667 129747.661567 10000.0 50000.00 140000.0 240000.00 1000000.0
SEX 30000.0 1.603733 0.489129 1.0 1.00 2.0 2.00 2.0
EDUCATION 30000.0 1.853133 0.790349 0.0 1.00 2.0 2.00 6.0
MARRIAGE 30000.0 1.551867 0.521970 0.0 1.00 2.0 2.00 3.0
AGE 30000.0 35.485500 9.217904 21.0 28.00 34.0 41.00 79.0
PAY_0 30000.0 -0.016700 1.123802 -2.0 -1.00 0.0 0.00 8.0
PAY_2 30000.0 -0.133767 1.197186 -2.0 -1.00 0.0 0.00 8.0
PAY_3 30000.0 -0.166200 1.196868 -2.0 -1.00 0.0 0.00 8.0
PAY_4 30000.0 -0.220667 1.169139 -2.0 -1.00 0.0 0.00 8.0
PAY_5 30000.0 -0.266200 1.133187 -2.0 -1.00 0.0 0.00 8.0
PAY_6 30000.0 -0.291100 1.149988 -2.0 -1.00 0.0 0.00 8.0
BILL_AMT1 30000.0 51223.330900 73635.860576 -165580.0 3558.75 22381.5 67091.00 964511.0
BILL_AMT2 30000.0 49179.075167 71173.768783 -69777.0 2984.75 21200.0 64006.25 983931.0
BILL_AMT3 30000.0 47013.154800 69349.387427 -157264.0 2666.25 20088.5 60164.75 1664089.0
BILL_AMT4 30000.0 43262.948967 64332.856134 -170000.0 2326.75 19052.0 54506.00 891586.0
BILL_AMT5 30000.0 40311.400967 60797.155770 -81334.0 1763.00 18104.5 50190.50 927171.0
BILL_AMT6 30000.0 38871.760400 59554.107537 -339603.0 1256.00 17071.0 49198.25 961664.0
PAY_AMT1 30000.0 5663.580500 16563.280354 0.0 1000.00 2100.0 5006.00 873552.0
PAY_AMT2 30000.0 5921.163500 23040.870402 0.0 833.00 2009.0 5000.00 1684259.0
PAY_AMT3 30000.0 5225.681500 17606.961470 0.0 390.00 1800.0 4505.00 896040.0
PAY_AMT4 30000.0 4826.076867 15666.159744 0.0 296.00 1500.0 4013.25 621000.0
PAY_AMT5 30000.0 4799.387633 15278.305679 0.0 252.50 1500.0 4031.50 426529.0
PAY_AMT6 30000.0 5215.502567 17777.465775 0.0 117.75 1500.0 4000.00 528666.0
default payment next month 30000.0 0.221200 0.415062 0.0 0.00 0.0 0.00 1.0
In [18]:
data.isna().sum()
data.isnull().sum()
Out[18]:
ID                            0
LIMIT_BAL                     0
SEX                           0
EDUCATION                     0
MARRIAGE                      0
AGE                           0
PAY_0                         0
PAY_2                         0
PAY_3                         0
PAY_4                         0
PAY_5                         0
PAY_6                         0
BILL_AMT1                     0
BILL_AMT2                     0
BILL_AMT3                     0
BILL_AMT4                     0
BILL_AMT5                     0
BILL_AMT6                     0
PAY_AMT1                      0
PAY_AMT2                      0
PAY_AMT3                      0
PAY_AMT4                      0
PAY_AMT5                      0
PAY_AMT6                      0
default payment next month    0
dtype: int64
In [19]:
missing = pd.DataFrame((data.isnull().sum())*100/data.shape[0]).reset_index()
plt.figure(figsize=(16,5))
ax = sns.pointplot('index',0,data=missing)
plt.xticks(rotation =90,fontsize =7)
plt.title("Percentage of Missing values")
plt.ylabel("PERCENTAGE")
plt.show()
In [20]:
value=len(data[data.duplicated()])
print("The number of duplicate values in the data set is = ",value)
The number of duplicate values in the data set is =  0
In [21]:
data.rename(columns={'default payment next month' : 'default_payment_next_month'}, inplace=True)
In [22]:
data['default_payment_next_month'].value_counts()
Out[22]:
0    23364
1     6636
Name: default_payment_next_month, dtype: int64
In [23]:
data['default_payment_next_month'].value_counts(normalize=True)
Out[23]:
0    0.7788
1    0.2212
Name: default_payment_next_month, dtype: float64
In [24]:
#plotting the count plot to vizualize the data distribution 
#plot the count plot to check the data distribution
plt.figure(figsize=(10,5))
sns.countplot(x = 'default_payment_next_month', data = data)
Out[24]:
<AxesSubplot:xlabel='default_payment_next_month', ylabel='count'>
In [25]:
data['SEX'].value_counts()
Out[25]:
2    18112
1    11888
Name: SEX, dtype: int64
In [26]:
plt.figure(figsize=(10,5))
sns.countplot(x = 'SEX', data = data)
Out[26]:
<AxesSubplot:xlabel='SEX', ylabel='count'>
In [27]:
data['EDUCATION'].value_counts()
Out[27]:
2    14030
1    10585
3     4917
5      280
4      123
6       51
0       14
Name: EDUCATION, dtype: int64
In [28]:
data["EDUCATION"] = data["EDUCATION"].replace({4:0,5:0,6:0})
data["EDUCATION"].value_counts()
Out[28]:
2    14030
1    10585
3     4917
0      468
Name: EDUCATION, dtype: int64
In [29]:
plt.figure(figsize=(10,5))
sns.countplot(x = 'EDUCATION', data = data)
Out[29]:
<AxesSubplot:xlabel='EDUCATION', ylabel='count'>
In [30]:
data["MARRIAGE"].unique()
Out[30]:
array([1, 2, 3, 0], dtype=int64)
In [31]:
data['MARRIAGE'].value_counts()
Out[31]:
2    15964
1    13659
3      323
0       54
Name: MARRIAGE, dtype: int64
In [32]:
data["MARRIAGE"].value_counts(normalize=True)
Out[32]:
2    0.532133
1    0.455300
3    0.010767
0    0.001800
Name: MARRIAGE, dtype: float64
In [33]:
data["MARRIAGE"] = data["MARRIAGE"].replace({0:3})
data["MARRIAGE"].value_counts(normalize=True)
Out[33]:
2    0.532133
1    0.455300
3    0.012567
Name: MARRIAGE, dtype: float64
In [34]:
plt.figure(figsize=(10,5))
sns.countplot(x = 'MARRIAGE', data=data)
Out[34]:
<AxesSubplot:xlabel='MARRIAGE', ylabel='count'>
In [35]:
data['AGE'].value_counts()
Out[35]:
29    1605
27    1477
28    1409
30    1395
26    1256
31    1217
25    1186
34    1162
32    1158
33    1146
24    1127
35    1113
36    1108
37    1041
39     954
38     944
23     931
40     870
41     824
42     794
44     700
43     670
45     617
46     570
22     560
47     501
48     466
49     452
50     411
51     340
53     325
52     304
54     247
55     209
56     178
58     122
57     122
59      83
60      67
21      67
61      56
62      44
63      31
64      31
66      25
65      24
67      16
69      15
70      10
68       5
73       4
72       3
75       3
71       3
79       1
74       1
Name: AGE, dtype: int64
In [36]:
data.groupby('default_payment_next_month')['AGE'].mean()
Out[36]:
default_payment_next_month
0    35.417266
1    35.725738
Name: AGE, dtype: float64
In [37]:
data= data.astype('int')
In [38]:
plt.figure(figsize=(15,7))
sns.countplot(x = 'AGE', data = data)
plt.show()
In [39]:
plt.figure(figsize=(10,10))
ax = sns.boxplot(x="default_payment_next_month", y="AGE", data=data)
In [40]:
data['LIMIT_BAL'].describe()
Out[40]:
count      30000.000000
mean      167484.322667
std       129747.661567
min        10000.000000
25%        50000.000000
50%       140000.000000
75%       240000.000000
max      1000000.000000
Name: LIMIT_BAL, dtype: float64
In [41]:
plt.figure(figsize=(10,5))
sns.distplot(data['LIMIT_BAL'], kde=True)
plt.show()
In [42]:
sns.barplot(x='default_payment_next_month', y='LIMIT_BAL', data=data)
Out[42]:
<AxesSubplot:xlabel='default_payment_next_month', ylabel='LIMIT_BAL'>
In [43]:
plt.figure(figsize=(10,10))
ax = sns.boxplot(x="default_payment_next_month", y="LIMIT_BAL", data=data)
In [44]:
data.rename(columns={'PAY_0':'PAY_SEPT','PAY_2':'PAY_AUG','PAY_3':'PAY_JUL','PAY_4':'PAY_JUN','PAY_5':'PAY_MAY','PAY_6':'PAY_APR'},inplace=True)
data.rename(columns={'BILL_AMT1':'BILL_AMT_SEPT','BILL_AMT2':'BILL_AMT_AUG','BILL_AMT3':'BILL_AMT_JUL','BILL_AMT4':'BILL_AMT_JUN','BILL_AMT5':'BILL_AMT_MAY','BILL_AMT6':'BILL_AMT_APR'}, inplace = True)
data.rename(columns={'PAY_AMT1':'PAY_AMT_SEPT','PAY_AMT2':'PAY_AMT_AUG','PAY_AMT3':'PAY_AMT_JUL','PAY_AMT4':'PAY_AMT_JUN','PAY_AMT5':'PAY_AMT_MAY','PAY_AMT6':'PAY_AMT_APR'},inplace=True)
In [45]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype
---  ------                      --------------  -----
 0   ID                          30000 non-null  int32
 1   LIMIT_BAL                   30000 non-null  int32
 2   SEX                         30000 non-null  int32
 3   EDUCATION                   30000 non-null  int32
 4   MARRIAGE                    30000 non-null  int32
 5   AGE                         30000 non-null  int32
 6   PAY_SEPT                    30000 non-null  int32
 7   PAY_AUG                     30000 non-null  int32
 8   PAY_JUL                     30000 non-null  int32
 9   PAY_JUN                     30000 non-null  int32
 10  PAY_MAY                     30000 non-null  int32
 11  PAY_APR                     30000 non-null  int32
 12  BILL_AMT_SEPT               30000 non-null  int32
 13  BILL_AMT_AUG                30000 non-null  int32
 14  BILL_AMT_JUL                30000 non-null  int32
 15  BILL_AMT_JUN                30000 non-null  int32
 16  BILL_AMT_MAY                30000 non-null  int32
 17  BILL_AMT_APR                30000 non-null  int32
 18  PAY_AMT_SEPT                30000 non-null  int32
 19  PAY_AMT_AUG                 30000 non-null  int32
 20  PAY_AMT_JUL                 30000 non-null  int32
 21  PAY_AMT_JUN                 30000 non-null  int32
 22  PAY_AMT_MAY                 30000 non-null  int32
 23  PAY_AMT_APR                 30000 non-null  int32
 24  default_payment_next_month  30000 non-null  int32
dtypes: int32(25)
memory usage: 2.9 MB
In [46]:
data.head()
Out[46]:
ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_SEPT PAY_AUG PAY_JUL PAY_JUN ... BILL_AMT_JUN BILL_AMT_MAY BILL_AMT_APR PAY_AMT_SEPT PAY_AMT_AUG PAY_AMT_JUL PAY_AMT_JUN PAY_AMT_MAY PAY_AMT_APR default_payment_next_month
0 1 20000 2 2 1 24 2 2 -1 -1 ... 0 0 0 0 689 0 0 0 0 1
1 2 120000 2 2 2 26 -1 2 0 0 ... 3272 3455 3261 0 1000 1000 1000 0 2000 1
2 3 90000 2 2 2 34 0 0 0 0 ... 14331 14948 15549 1518 1500 1000 1000 1000 5000 0
3 4 50000 2 2 1 37 0 0 0 0 ... 28314 28959 29547 2000 2019 1200 1100 1069 1000 0
4 5 50000 1 2 1 57 -1 0 -1 0 ... 20940 19146 19131 2000 36681 10000 9000 689 679 0

5 rows × 25 columns

In [47]:
total_bill_amnt_df = data[['BILL_AMT_SEPT',	'BILL_AMT_AUG',	'BILL_AMT_JUL',	'BILL_AMT_JUN',	'BILL_AMT_MAY',	'BILL_AMT_APR']]
In [48]:
sns.pairplot(data = total_bill_amnt_df)
Out[48]:
<seaborn.axisgrid.PairGrid at 0x27311f253a0>
In [49]:
#plotting the count plot for Previous payment status
pre_payment = ['PAY_SEPT',	'PAY_AUG',	'PAY_JUL',	'PAY_JUN',	'PAY_MAY',	'PAY_APR']
for col in pre_payment:
  plt.figure(figsize=(10,5))
  sns.countplot(x = col, hue = 'default_payment_next_month', data = data)
In [50]:
pay_amnt_df = data[['PAY_AMT_SEPT',	'PAY_AMT_AUG',	'PAY_AMT_JUL',	'PAY_AMT_JUN',	'PAY_AMT_MAY',	'PAY_AMT_APR', 'default_payment_next_month']]
In [51]:
sns.pairplot(data = pay_amnt_df, hue='default_payment_next_month')
Out[51]:
<seaborn.axisgrid.PairGrid at 0x2731645ef70>
In [52]:
data.shape
Out[52]:
(30000, 25)
In [53]:
x,y = 'SEX', 'default_payment_next_month'

(data
.groupby(x)[y]
.value_counts(normalize=True)
.mul(100)
.rename('percent')
.reset_index()
.pipe((sns.catplot,'data'), x=x,y='percent',hue=y,kind='bar'))
Out[53]:
<seaborn.axisgrid.FacetGrid at 0x27316535d00>
In [54]:
x,y = 'EDUCATION', 'default_payment_next_month'

(data
.groupby(x)[y]
.value_counts(normalize=True)
.mul(100)
.rename('percent')
.reset_index()
.pipe((sns.catplot,'data'), x=x,y='percent',hue=y,kind='bar'))
Out[54]:
<seaborn.axisgrid.FacetGrid at 0x27317255070>
In [55]:
x,y = 'MARRIAGE', 'default_payment_next_month'

(data
.groupby(x)[y]
.value_counts(normalize=True)
.mul(100)
.rename('percent')
.reset_index()
.pipe((sns.catplot,'data'), x=x,y='percent',hue=y,kind='bar'))
Out[55]:
<seaborn.axisgrid.FacetGrid at 0x2731733bfd0>
In [56]:
plt.figure(figsize=(19,7))
sns.barplot(x = 'AGE', y = 'default_payment_next_month', data = data)

plt.show()
In [57]:
plt.figure(figsize=(20,15))
sns.heatmap(data.corr(),annot=True,cmap="coolwarm")
Out[57]:
<AxesSubplot:>
In [58]:
from imblearn.over_sampling import SMOTE

smote = SMOTE()

# fit predictor and target variable
x_smote, y_smote = smote.fit_resample(data.iloc[:,0:-1], data['default_payment_next_month'])

print('Original dataset shape', len(data))
print('Resampled dataset shape', len(y_smote))
Original dataset shape 30000
Resampled dataset shape 46728
In [59]:
x_smote 
Out[59]:
ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_SEPT PAY_AUG PAY_JUL PAY_JUN ... BILL_AMT_JUL BILL_AMT_JUN BILL_AMT_MAY BILL_AMT_APR PAY_AMT_SEPT PAY_AMT_AUG PAY_AMT_JUL PAY_AMT_JUN PAY_AMT_MAY PAY_AMT_APR
0 1 20000 2 2 1 24 2 2 -1 -1 ... 689 0 0 0 0 689 0 0 0 0
1 2 120000 2 2 2 26 -1 2 0 0 ... 2682 3272 3455 3261 0 1000 1000 1000 0 2000
2 3 90000 2 2 2 34 0 0 0 0 ... 13559 14331 14948 15549 1518 1500 1000 1000 1000 5000
3 4 50000 2 2 1 37 0 0 0 0 ... 49291 28314 28959 29547 2000 2019 1200 1100 1069 1000
4 5 50000 1 2 1 57 -1 0 -1 0 ... 35835 20940 19146 19131 2000 36681 10000 9000 689 679
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
46723 13623 396156 2 1 1 28 -2 -1 -1 -1 ... 5711 15896 61653 19694 7542 5740 15969 61823 13134 9864
46724 20354 50000 1 1 2 23 0 0 0 0 ... 3585 3846 1316 1284 1073 1242 3812 1272 9 9
46725 4218 10000 1 1 1 31 1 0 0 0 ... 9343 9136 9930 9368 1840 1373 580 1300 0 844
46726 19207 70000 1 1 2 32 2 0 0 0 ... 37192 38272 38916 39837 2095 2000 1721 1300 1559 1514
46727 26403 40000 1 1 2 26 1 0 0 2 ... 40559 40981 39173 38831 2000 3905 1422 51 1517 1517

46728 rows × 24 columns

In [60]:
columns = list(data.columns)
In [61]:
columns.pop()
Out[61]:
'default_payment_next_month'
In [62]:
balance_df = pd.DataFrame(x_smote, columns=columns)
In [63]:
balance_df['default_payment_next_month'] = y_smote
In [64]:
sns.countplot('default_payment_next_month', data = balance_df)
Out[64]:
<AxesSubplot:xlabel='default_payment_next_month', ylabel='count'>
In [65]:
balance_df[balance_df['default_payment_next_month']==1]
Out[65]:
ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_SEPT PAY_AUG PAY_JUL PAY_JUN ... BILL_AMT_JUN BILL_AMT_MAY BILL_AMT_APR PAY_AMT_SEPT PAY_AMT_AUG PAY_AMT_JUL PAY_AMT_JUN PAY_AMT_MAY PAY_AMT_APR default_payment_next_month
0 1 20000 2 2 1 24 2 2 -1 -1 ... 0 0 0 0 689 0 0 0 0 1
1 2 120000 2 2 2 26 -1 2 0 0 ... 3272 3455 3261 0 1000 1000 1000 0 2000 1
13 14 70000 1 2 2 30 1 2 2 0 ... 66782 36137 36894 3200 0 3000 3000 1500 0 1
16 17 20000 1 1 2 24 0 0 2 2 ... 18338 17905 19104 3200 0 1500 0 1650 0 1
21 22 120000 2 2 1 39 -1 -1 -1 -1 ... 0 632 316 316 316 0 632 316 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
46723 13623 396156 2 1 1 28 -2 -1 -1 -1 ... 15896 61653 19694 7542 5740 15969 61823 13134 9864 1
46724 20354 50000 1 1 2 23 0 0 0 0 ... 3846 1316 1284 1073 1242 3812 1272 9 9 1
46725 4218 10000 1 1 1 31 1 0 0 0 ... 9136 9930 9368 1840 1373 580 1300 0 844 1
46726 19207 70000 1 1 2 32 2 0 0 0 ... 38272 38916 39837 2095 2000 1721 1300 1559 1514 1
46727 26403 40000 1 1 2 26 1 0 0 2 ... 40981 39173 38831 2000 3905 1422 51 1517 1517 1

23364 rows × 25 columns

In [66]:
credit_df_copy = balance_df.copy()
In [67]:
credit_df_copy['total_Payement_Value'] = credit_df_copy['PAY_SEPT'] + credit_df_copy['PAY_AUG'] + credit_df_copy['PAY_JUL'] + credit_df_copy['PAY_JUN'] + credit_df_copy['PAY_MAY'] + credit_df_copy['PAY_APR']
In [68]:
credit_df_copy.groupby('default_payment_next_month')['total_Payement_Value'].mean()
Out[68]:
default_payment_next_month
0   -1.980140
1    1.682332
Name: total_Payement_Value, dtype: float64
In [69]:
plt.figure(figsize=(10,10))
sns.boxplot(data = credit_df_copy, x = 'default_payment_next_month', y = 'total_Payement_Value' )
Out[69]:
<AxesSubplot:xlabel='default_payment_next_month', ylabel='total_Payement_Value'>
In [70]:
credit_df_copy['Dues'] = (credit_df_copy['BILL_AMT_APR']+credit_df_copy['BILL_AMT_MAY']+credit_df_copy['BILL_AMT_JUN']+credit_df_copy['BILL_AMT_JUL']+credit_df_copy['BILL_AMT_SEPT'])-(credit_df_copy['PAY_AMT_APR']+credit_df_copy['PAY_AMT_MAY']+credit_df_copy['PAY_AMT_JUN']+credit_df_copy['PAY_AMT_JUL']+credit_df_copy['PAY_AMT_AUG']+credit_df_copy['PAY_AMT_SEPT'])
In [71]:
credit_df_copy.groupby('default_payment_next_month')['Dues'].mean()
Out[71]:
default_payment_next_month
0    187742.051532
1    192413.576956
Name: Dues, dtype: float64
In [72]:
credit_df_copy.replace({'SEX': {1 : 'MALE', 2 : 'FEMALE'},
                        'EDUCATION' : {1 : 'graduate school', 2 : 'university', 3 : 'high school', 0 : 'others'},
                        'MARRIAGE' : {1 : 'married', 2 : 'single', 3 : 'others'}}, inplace = True)
In [73]:
credit_df_copy.head()
Out[73]:
ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_SEPT PAY_AUG PAY_JUL PAY_JUN ... BILL_AMT_APR PAY_AMT_SEPT PAY_AMT_AUG PAY_AMT_JUL PAY_AMT_JUN PAY_AMT_MAY PAY_AMT_APR default_payment_next_month total_Payement_Value Dues
0 1 20000 FEMALE university married 24 2 2 -1 -1 ... 0 0 689 0 0 0 0 1 -2 3913
1 2 120000 FEMALE university single 26 -1 2 0 0 ... 3261 0 1000 1000 1000 0 2000 1 3 10352
2 3 90000 FEMALE university single 34 0 0 0 0 ... 15549 1518 1500 1000 1000 1000 5000 0 0 76608
3 4 50000 FEMALE university married 37 0 0 0 0 ... 29547 2000 2019 1200 1100 1069 1000 0 0 174713
4 5 50000 MALE university married 57 -1 0 -1 0 ... 19131 2000 36681 10000 9000 689 679 0 -2 44620

5 rows × 27 columns

In [74]:
credit_df_copy = pd.get_dummies(credit_df_copy,columns=['EDUCATION','MARRIAGE'])
In [75]:
credit_df_copy = pd.get_dummies(credit_df_copy, columns = ['PAY_SEPT',	'PAY_AUG',	'PAY_JUL',	'PAY_JUN',	'PAY_MAY',	'PAY_APR'], drop_first = True )
In [76]:
encoders_nums = {
                 "SEX":{"FEMALE": 0, "MALE": 1}
}
credit_df_copy = credit_df_copy.replace(encoders_nums)
In [77]:
credit_df_copy.drop('ID',axis = 1, inplace = True)
In [78]:
credit_df_copy.columns
Out[78]:
Index(['LIMIT_BAL', 'SEX', 'AGE', 'BILL_AMT_SEPT', 'BILL_AMT_AUG',
       'BILL_AMT_JUL', 'BILL_AMT_JUN', 'BILL_AMT_MAY', 'BILL_AMT_APR',
       'PAY_AMT_SEPT', 'PAY_AMT_AUG', 'PAY_AMT_JUL', 'PAY_AMT_JUN',
       'PAY_AMT_MAY', 'PAY_AMT_APR', 'default_payment_next_month',
       'total_Payement_Value', 'Dues', 'EDUCATION_graduate school',
       'EDUCATION_high school', 'EDUCATION_others', 'EDUCATION_university',
       'MARRIAGE_married', 'MARRIAGE_others', 'MARRIAGE_single', 'PAY_SEPT_-1',
       'PAY_SEPT_0', 'PAY_SEPT_1', 'PAY_SEPT_2', 'PAY_SEPT_3', 'PAY_SEPT_4',
       'PAY_SEPT_5', 'PAY_SEPT_6', 'PAY_SEPT_7', 'PAY_SEPT_8', 'PAY_AUG_-1',
       'PAY_AUG_0', 'PAY_AUG_1', 'PAY_AUG_2', 'PAY_AUG_3', 'PAY_AUG_4',
       'PAY_AUG_5', 'PAY_AUG_6', 'PAY_AUG_7', 'PAY_AUG_8', 'PAY_JUL_-1',
       'PAY_JUL_0', 'PAY_JUL_1', 'PAY_JUL_2', 'PAY_JUL_3', 'PAY_JUL_4',
       'PAY_JUL_5', 'PAY_JUL_6', 'PAY_JUL_7', 'PAY_JUL_8', 'PAY_JUN_-1',
       'PAY_JUN_0', 'PAY_JUN_1', 'PAY_JUN_2', 'PAY_JUN_3', 'PAY_JUN_4',
       'PAY_JUN_5', 'PAY_JUN_6', 'PAY_JUN_7', 'PAY_JUN_8', 'PAY_MAY_-1',
       'PAY_MAY_0', 'PAY_MAY_1', 'PAY_MAY_2', 'PAY_MAY_3', 'PAY_MAY_4',
       'PAY_MAY_5', 'PAY_MAY_6', 'PAY_MAY_7', 'PAY_MAY_8', 'PAY_APR_-1',
       'PAY_APR_0', 'PAY_APR_1', 'PAY_APR_2', 'PAY_APR_3', 'PAY_APR_4',
       'PAY_APR_5', 'PAY_APR_6', 'PAY_APR_7', 'PAY_APR_8'],
      dtype='object')
In [79]:
credit_df_copy.shape
Out[79]:
(46728, 85)
In [80]:
credit_df_copy.head()
Out[80]:
LIMIT_BAL SEX AGE BILL_AMT_SEPT BILL_AMT_AUG BILL_AMT_JUL BILL_AMT_JUN BILL_AMT_MAY BILL_AMT_APR PAY_AMT_SEPT ... PAY_APR_-1 PAY_APR_0 PAY_APR_1 PAY_APR_2 PAY_APR_3 PAY_APR_4 PAY_APR_5 PAY_APR_6 PAY_APR_7 PAY_APR_8
0 20000 0 24 3913 3102 689 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 120000 0 26 2682 1725 2682 3272 3455 3261 0 ... 0 0 0 1 0 0 0 0 0 0
2 90000 0 34 29239 14027 13559 14331 14948 15549 1518 ... 0 1 0 0 0 0 0 0 0 0
3 50000 0 37 46990 48233 49291 28314 28959 29547 2000 ... 0 1 0 0 0 0 0 0 0 0
4 50000 1 57 8617 5670 35835 20940 19146 19131 2000 ... 0 1 0 0 0 0 0 0 0 0

5 rows × 85 columns

In [81]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, auc

from sklearn import metrics  
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_precision_recall_curve
In [82]:
#logisticregresion
In [83]:
credit_df_logistic = credit_df_copy.copy()
In [84]:
credit_df_logistic.head()
Out[84]:
LIMIT_BAL SEX AGE BILL_AMT_SEPT BILL_AMT_AUG BILL_AMT_JUL BILL_AMT_JUN BILL_AMT_MAY BILL_AMT_APR PAY_AMT_SEPT ... PAY_APR_-1 PAY_APR_0 PAY_APR_1 PAY_APR_2 PAY_APR_3 PAY_APR_4 PAY_APR_5 PAY_APR_6 PAY_APR_7 PAY_APR_8
0 20000 0 24 3913 3102 689 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 120000 0 26 2682 1725 2682 3272 3455 3261 0 ... 0 0 0 1 0 0 0 0 0 0
2 90000 0 34 29239 14027 13559 14331 14948 15549 1518 ... 0 1 0 0 0 0 0 0 0 0
3 50000 0 37 46990 48233 49291 28314 28959 29547 2000 ... 0 1 0 0 0 0 0 0 0 0
4 50000 1 57 8617 5670 35835 20940 19146 19131 2000 ... 0 1 0 0 0 0 0 0 0 0

5 rows × 85 columns

In [85]:
X = credit_df_logistic.drop(['default_payment_next_month','total_Payement_Value','Dues'],axis=1)
y = credit_df_logistic['default_payment_next_month']
In [86]:
columns = X.columns
In [87]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
In [88]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify = y)
In [89]:
param_grid = {'penalty':['l1','l2'], 'C' : [0.001, 0.01, 0.1, 1, 10, 100, 1000] }
In [90]:
grid_lr_clf = GridSearchCV(LogisticRegression(), param_grid, scoring = 'accuracy', n_jobs = -1, verbose = 3, cv = 3)
grid_lr_clf.fit(X_train, y_train)
Fitting 3 folds for each of 14 candidates, totalling 42 fits
Out[90]:
GridSearchCV(cv=3, estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                         'penalty': ['l1', 'l2']},
             scoring='accuracy', verbose=3)
In [91]:
optimized_clf = grid_lr_clf.best_estimator_
In [92]:
grid_lr_clf.best_params_
Out[92]:
{'C': 10, 'penalty': 'l2'}
In [93]:
grid_lr_clf.best_score_
Out[93]:
0.7527391545575073
In [94]:
train_preds = optimized_clf.predict_proba(X_train)[:,1]
test_preds = optimized_clf.predict_proba(X_test)[:,1]
In [95]:
train_class_preds = optimized_clf.predict(X_train)
test_class_preds = optimized_clf.predict(X_test)
In [96]:
# Get the accuracy scores
train_accuracy_lr = accuracy_score(train_class_preds,y_train)
test_accuracy_lr = accuracy_score(test_class_preds,y_test)

print("The accuracy on train data is ", train_accuracy_lr)
print("The accuracy on test data is ", test_accuracy_lr)
The accuracy on train data is  0.7538250231577602
The accuracy on test data is  0.753323390182219
In [97]:
#print the accuracy,precission,recall,f1,roc_score 
test_accuracy_lr = accuracy_score(test_class_preds,y_test)
test_precision_score_lr = precision_score(test_class_preds,y_test)
test_recall_score_lr = recall_score(test_class_preds,y_test)
test_f1_score_lr = f1_score(test_class_preds,y_test)
test_roc_score_lr = roc_auc_score(test_class_preds,y_test)

print("The accuracy on test data is ", test_accuracy_lr)
print("The precision on test data is ", test_precision_score_lr)
print("The recall on test data is ", test_recall_score_lr)
print("The f1 on test data is ", test_f1_score_lr)
print("The roc_score on test data is ", test_roc_score_lr)
The accuracy on test data is  0.753323390182219
The precision on test data is  0.6878080415045396
The recall on test data is  0.7914925373134328
The f1 on test data is  0.7360166551006245
The roc_score on test data is  0.7577460393252177
In [98]:
cm_lr = confusion_matrix(y_train, train_class_preds)
print(cm_lr)
[[12821  2832]
 [ 4875 10779]]
In [99]:
labels = ['Not Defaulter', 'Defaulter']
ax= plt.subplot()
sns.heatmap(cm_lr, annot=True, ax = ax) #annot=True to annotate cells

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(labels)
ax.yaxis.set_ticklabels(labels)
Out[99]:
[Text(0, 0.5, 'Not Defaulter'), Text(0, 1.5, 'Defaulter')]
In [100]:
feature_importance = pd.DataFrame({'Features':columns, 'Importance':np.abs(optimized_clf.coef_).ravel() })
In [101]:
feature_importance = feature_importance.sort_values(by = 'Importance', ascending=False)[:10]
In [102]:
plt.bar(height=feature_importance['Importance'], x= feature_importance['Features'])
plt.xticks(rotation=80)
plt.title("Feature importances via coefficients")
plt.show()
In [103]:
y_preds_proba_lr = optimized_clf.predict_proba(X_test)[::,1]
In [104]:
y_pred_proba = y_preds_proba_lr
fpr, tpr, _ = roc_curve(y_test,  y_pred_proba)
auc = roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()
In [105]:
def G(v):
    bins = np.linspace(0., 100., 11)
    total = float(np.sum(v))
    yvals = []
    for b in bins:
        bin_vals = v[v <= np.percentile(v, b)]
        bin_fraction = (np.sum(bin_vals) / total) * 100.0
        yvals.append(bin_fraction)
    # perfect equality area
    pe_area = np.trapz(bins, x=bins)
    # lorenz area
    lorenz_area = np.trapz(yvals, x=bins)
    gini_val = (pe_area - lorenz_area) / float(pe_area)
    return bins, yvals, gini_val
bins, result, gini_val = G(y_preds_proba_lr)
plt.figure()
plt.subplot(2, 1, 1)
plt.plot(bins, result, label="observed")
plt.plot(bins, bins, '--', label="perfect eq.")
plt.xlabel("fraction of population")
plt.ylabel("fraction of wealth")
plt.title("GINI: %.4f" %(gini_val))
                                                                          
Out[105]:
Text(0.5, 1.0, 'GINI: 0.3203')
In [106]:
from sklearn.ensemble import RandomForestClassifier
In [107]:
X = credit_df_copy.drop(['default_payment_next_month','total_Payement_Value','Dues'],axis=1)
y = credit_df_copy['default_payment_next_month']
In [108]:
rf_clf = RandomForestClassifier()
rf_clf.fit(X_train,y_train)
Out[108]:
RandomForestClassifier()
In [109]:
train_class_preds = rf_clf.predict(X_train)
test_class_preds = rf_clf.predict(X_test)
In [110]:
train_accuracy_rf = accuracy_score(train_class_preds,y_train)
test_accuracy_rf = accuracy_score(test_class_preds,y_test)

print("The accuracy on train data is ", train_accuracy_rf)
print("The accuracy on test data is ", test_accuracy_rf)
The accuracy on train data is  0.999361165234612
The accuracy on test data is  0.8314635886129305
In [111]:
test_accuracy_rf = accuracy_score(test_class_preds,y_test)
test_precision_score_rf = precision_score(test_class_preds,y_test)
test_recall_score_rf = recall_score(test_class_preds,y_test)
test_f1_score_rf = f1_score(test_class_preds,y_test)
test_roc_score_rf = roc_auc_score(test_class_preds,y_test)

print("The accuracy on test data is ", test_accuracy_rf)
print("The precision on test data is ", test_precision_score_rf)
print("The recall on test data is ", test_recall_score_rf)
print("The f1 on test data is ", test_f1_score_rf)
print("The roc_score on test data is ", test_roc_score_rf)
The accuracy on test data is  0.8314635886129305
The precision on test data is  0.7989623865110247
The recall on test data is  0.8544874462477459
The f1 on test data is  0.8257926134459415
The roc_score on test data is  0.8328696364214861
In [112]:
param_grid = {'n_estimators': [100,150,200], 'max_depth': [10,20,30]}
In [113]:
grid_rf_clf = GridSearchCV(RandomForestClassifier(), param_grid, scoring = 'accuracy', n_jobs = -1, verbose = 3, cv = 3)
grid_rf_clf.fit(X_train, y_train)
Fitting 3 folds for each of 9 candidates, totalling 27 fits
Out[113]:
GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'max_depth': [10, 20, 30],
                         'n_estimators': [100, 150, 200]},
             scoring='accuracy', verbose=3)
In [114]:
grid_rf_clf.best_score_
Out[114]:
0.8233941501745736
In [115]:
grid_rf_clf.best_params_
Out[115]:
{'max_depth': 30, 'n_estimators': 150}
In [116]:
optimal_rf_clf = grid_rf_clf.best_estimator_
In [117]:
train_class_preds = optimal_rf_clf.predict(X_train)
test_class_preds = optimal_rf_clf.predict(X_test)
In [118]:
train_accuracy_rf = accuracy_score(train_class_preds,y_train)
test_accuracy_rf = accuracy_score(test_class_preds,y_test)

print("The accuracy on train data is ", train_accuracy_rf)
print("The accuracy on test data is ", test_accuracy_rf)
The accuracy on train data is  0.9984667965630689
The accuracy on test data is  0.8323065949030543
In [119]:
test_accuracy_rf = accuracy_score(test_class_preds,y_test)
test_precision_score_rf = precision_score(test_class_preds,y_test)
test_recall_score_rf = recall_score(test_class_preds,y_test)
test_f1_score_rf = f1_score(test_class_preds,y_test)
test_roc_score_rf = roc_auc_score(test_class_preds,y_test)

print("The accuracy on test data is ", test_accuracy_rf)
print("The precision on test data is ", test_precision_score_rf)
print("The recall on test data is ", test_recall_score_rf)
print("The f1 on test data is ", test_f1_score_rf)
print("The roc_score on test data is ", test_roc_score_rf)
The accuracy on test data is  0.8323065949030543
The precision on test data is  0.800129701686122
The recall on test data is  0.8551427779317993
The f1 on test data is  0.8267220584293755
The roc_score on test data is  0.8336881185869549
In [120]:
cm_rf = confusion_matrix(y_train, train_class_preds)
print(cm_rf)
[[15636    17]
 [   31 15623]]
In [121]:
labels = ['Not Defaulter', 'Defaulter']
ax= plt.subplot()
sns.heatmap(cm_rf, annot=True, ax = ax) #annot=True to annotate cells

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(labels)
ax.yaxis.set_ticklabels(labels)
Out[121]:
[Text(0, 0.5, 'Not Defaulter'), Text(0, 1.5, 'Defaulter')]
In [122]:
len(optimal_rf_clf.feature_importances_)
Out[122]:
82
In [123]:
feature_importances_rf = pd.DataFrame(optimal_rf_clf.feature_importances_,
                                   index = columns,
                                    columns=['importance_rf']).sort_values('importance_rf',
                                                                        ascending=False)[:10]
                                    
plt.subplots(figsize=(17,6))
plt.title("Feature importances")
plt.bar(feature_importances_rf.index, feature_importances_rf['importance_rf'],
        color="g",  align="center")
plt.xticks(feature_importances_rf.index, rotation = 85)
#plt.xlim([-1, X.shape[1]])
plt.show()
In [124]:
train_class_preds = optimal_rf_clf.predict(X_train)
test_class_preds = optimal_rf_clf.predict(X_test)
In [125]:
y_preds_proba_rf = optimal_rf_clf.predict_proba(X_test)[::,1]
In [126]:
import sklearn.metrics as metrics
y_pred_proba = y_preds_proba_rf
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()
In [127]:
def G(v):
    bins = np.linspace(0., 100., 11)
    total = float(np.sum(v))
    yvals = []
    for b in bins:
        bin_vals = v[v <= np.percentile(v, b)]
        bin_fraction = (np.sum(bin_vals) / total) * 100.0
        yvals.append(bin_fraction)
    # perfect equality area
    pe_area = np.trapz(bins, x=bins)
    # lorenz area
    lorenz_area = np.trapz(yvals, x=bins)
    gini_val = (pe_area - lorenz_area) / float(pe_area)
    return bins, yvals, gini_val
bins, result, gini_val = G(y_preds_proba_rf)
plt.figure()
plt.subplot(2, 1, 1)
plt.plot(bins, result, label="observed")
plt.plot(bins, bins, '--', label="perfect eq.")
plt.xlabel("fraction of population")
plt.ylabel("fraction of wealth")
plt.title("GINI: %.4f" %(gini_val))
Out[127]:
Text(0.5, 1.0, 'GINI: 0.3368')
In [128]:
from sklearn.tree import DecisionTreeClassifier#cartmodel
In [129]:
credit_df_cart=credit_df_copy.copy()
In [130]:
credit_df_cart.head()
Out[130]:
LIMIT_BAL SEX AGE BILL_AMT_SEPT BILL_AMT_AUG BILL_AMT_JUL BILL_AMT_JUN BILL_AMT_MAY BILL_AMT_APR PAY_AMT_SEPT ... PAY_APR_-1 PAY_APR_0 PAY_APR_1 PAY_APR_2 PAY_APR_3 PAY_APR_4 PAY_APR_5 PAY_APR_6 PAY_APR_7 PAY_APR_8
0 20000 0 24 3913 3102 689 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 120000 0 26 2682 1725 2682 3272 3455 3261 0 ... 0 0 0 1 0 0 0 0 0 0
2 90000 0 34 29239 14027 13559 14331 14948 15549 1518 ... 0 1 0 0 0 0 0 0 0 0
3 50000 0 37 46990 48233 49291 28314 28959 29547 2000 ... 0 1 0 0 0 0 0 0 0 0
4 50000 1 57 8617 5670 35835 20940 19146 19131 2000 ... 0 1 0 0 0 0 0 0 0 0

5 rows × 85 columns

In [131]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify = y)
In [132]:
clf = DecisionTreeClassifier()
clf = clf.fit(X_train,y_train)
In [133]:
y_pred = clf.predict(X_test)
In [134]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
result = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(result)
result1 = classification_report(y_test, y_pred)
print("Classification Report:",)
print (result1)
result2 = accuracy_score(y_test,y_pred)
print("Accuracy:",result2)
Confusion Matrix:
[[5598 2113]
 [1871 5839]]
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.73      0.74      7711
           1       0.73      0.76      0.75      7710

    accuracy                           0.74     15421
   macro avg       0.74      0.74      0.74     15421
weighted avg       0.74      0.74      0.74     15421

Accuracy: 0.7416509953958887
In [135]:
from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier 
from sklearn import tree

text_representation = tree.export_text(clf)
print(text_representation)


with open("decistion_tree.log", "w") as fout:
    fout.write(text_representation)


fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(clf,feature_names=X.columns,filled=True)
|--- feature_34 <= 0.50
|   |--- feature_35 <= 0.50
|   |   |--- feature_19 <= 0.50
|   |   |   |--- feature_25 <= 0.50
|   |   |   |   |--- feature_10 <= 1996.00
|   |   |   |   |   |--- feature_1 <= 0.50
|   |   |   |   |   |   |--- feature_64 <= 0.50
|   |   |   |   |   |   |   |--- feature_55 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_44 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_9 <= 738.50
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 383.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 31
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  383.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 15
|   |   |   |   |   |   |   |   |   |--- feature_9 >  738.50
|   |   |   |   |   |   |   |   |   |   |--- feature_5 <= 26573.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 22
|   |   |   |   |   |   |   |   |   |   |--- feature_5 >  26573.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 16
|   |   |   |   |   |   |   |   |--- feature_44 >  0.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_55 >  0.50
|   |   |   |   |   |   |   |   |--- feature_11 <= 750.00
|   |   |   |   |   |   |   |   |   |--- feature_9 <= 3538.50
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 163.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  163.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_9 >  3538.50
|   |   |   |   |   |   |   |   |   |   |--- feature_9 <= 6915.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_9 >  6915.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |--- feature_11 >  750.00
|   |   |   |   |   |   |   |   |   |--- feature_10 <= 1101.00
|   |   |   |   |   |   |   |   |   |   |--- feature_2 <= 22.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_2 >  22.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |--- feature_10 >  1101.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_64 >  0.50
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_1 >  0.50
|   |   |   |   |   |   |--- feature_44 <= 0.50
|   |   |   |   |   |   |   |--- feature_74 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_9 <= 4818.00
|   |   |   |   |   |   |   |   |   |--- feature_54 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_3 <= 16224.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 39
|   |   |   |   |   |   |   |   |   |   |--- feature_3 >  16224.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 17
|   |   |   |   |   |   |   |   |   |--- feature_54 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_9 >  4818.00
|   |   |   |   |   |   |   |   |   |--- feature_54 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_13 <= 5350.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 12
|   |   |   |   |   |   |   |   |   |   |--- feature_13 >  5350.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_54 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_74 >  0.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_44 >  0.50
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- feature_10 >  1996.00
|   |   |   |   |   |--- feature_54 <= 0.50
|   |   |   |   |   |   |--- feature_24 <= 0.50
|   |   |   |   |   |   |   |--- feature_74 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_0 <= 149429.50
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 140058.00
|   |   |   |   |   |   |   |   |   |   |--- feature_1 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 27
|   |   |   |   |   |   |   |   |   |   |--- feature_1 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 24
|   |   |   |   |   |   |   |   |   |--- feature_0 >  140058.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_0 >  149429.50
|   |   |   |   |   |   |   |   |   |--- feature_4 <= 224882.00
|   |   |   |   |   |   |   |   |   |   |--- feature_3 <= 1955.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 15
|   |   |   |   |   |   |   |   |   |   |--- feature_3 >  1955.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 25
|   |   |   |   |   |   |   |   |   |--- feature_4 >  224882.00
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 396552.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 12
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  396552.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |--- feature_74 >  0.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_24 >  0.50
|   |   |   |   |   |   |   |--- feature_33 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_9 <= 1027.00
|   |   |   |   |   |   |   |   |   |--- feature_65 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_12 <= 7195.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |   |--- feature_12 >  7195.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_65 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_7 <= 843.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_7 >  843.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_9 >  1027.00
|   |   |   |   |   |   |   |   |   |--- feature_3 <= -5523.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_3 >  -5523.00
|   |   |   |   |   |   |   |   |   |   |--- feature_8 <= -176.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- feature_8 >  -176.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |--- feature_33 >  0.50
|   |   |   |   |   |   |   |   |--- feature_0 <= 450000.00
|   |   |   |   |   |   |   |   |   |--- feature_11 <= 59101.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_11 >  59101.00
|   |   |   |   |   |   |   |   |   |   |--- feature_2 <= 28.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_2 >  28.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_0 >  450000.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- feature_54 >  0.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |--- feature_25 >  0.50
|   |   |   |   |--- feature_42 <= 0.50
|   |   |   |   |   |--- feature_2 <= 52.50
|   |   |   |   |   |   |--- feature_17 <= 0.50
|   |   |   |   |   |   |   |--- feature_3 <= 1114.50
|   |   |   |   |   |   |   |   |--- feature_10 <= 8.00
|   |   |   |   |   |   |   |   |   |--- feature_46 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_46 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_10 >  8.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_3 >  1114.50
|   |   |   |   |   |   |   |   |--- feature_12 <= 59718.50
|   |   |   |   |   |   |   |   |   |--- feature_47 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_4 <= 1719.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_4 >  1719.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 16
|   |   |   |   |   |   |   |   |   |--- feature_47 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_12 >  59718.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- feature_17 >  0.50
|   |   |   |   |   |   |   |--- feature_4 <= 45102.00
|   |   |   |   |   |   |   |   |--- feature_13 <= 1026.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_13 >  1026.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_4 >  45102.00
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- feature_2 >  52.50
|   |   |   |   |   |   |--- feature_14 <= 318.50
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_14 >  318.50
|   |   |   |   |   |   |   |--- feature_13 <= 2710.00
|   |   |   |   |   |   |   |   |--- feature_2 <= 61.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_2 >  61.00
|   |   |   |   |   |   |   |   |   |--- feature_7 <= 29543.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_7 >  29543.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_13 >  2710.00
|   |   |   |   |   |   |   |   |--- feature_11 <= 5663.00
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_11 >  5663.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- feature_42 >  0.50
|   |   |   |   |   |--- feature_2 <= 25.50
|   |   |   |   |   |   |--- feature_12 <= 2314.00
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_12 >  2314.00
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- feature_2 >  25.50
|   |   |   |   |   |   |--- feature_11 <= 5881.00
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- feature_11 >  5881.00
|   |   |   |   |   |   |   |--- feature_0 <= 60000.00
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_0 >  60000.00
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |--- feature_19 >  0.50
|   |   |   |--- feature_1 <= 0.50
|   |   |   |   |--- feature_9 <= 1999.00
|   |   |   |   |   |--- feature_16 <= 0.50
|   |   |   |   |   |   |--- feature_0 <= 49049.50
|   |   |   |   |   |   |   |--- feature_23 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_32 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_13 <= 1976.50
|   |   |   |   |   |   |   |   |   |   |--- feature_3 <= 31317.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |   |   |--- feature_3 >  31317.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |--- feature_13 >  1976.50
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 2059.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  2059.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_32 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_6 <= 931.50
|   |   |   |   |   |   |   |   |   |   |--- feature_6 <= 371.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- feature_6 >  371.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_6 >  931.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_23 >  0.50
|   |   |   |   |   |   |   |   |--- feature_3 <= 3716.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_3 >  3716.50
|   |   |   |   |   |   |   |   |   |--- feature_17 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_4 <= 1035.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_4 >  1035.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 11
|   |   |   |   |   |   |   |   |   |--- feature_17 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- feature_0 >  49049.50
|   |   |   |   |   |   |   |--- feature_15 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_0 <= 165730.00
|   |   |   |   |   |   |   |   |   |--- feature_8 <= 2.50
|   |   |   |   |   |   |   |   |   |   |--- feature_7 <= -47.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |   |--- feature_7 >  -47.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |--- feature_8 >  2.50
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 978.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 14
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  978.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 12
|   |   |   |   |   |   |   |   |--- feature_0 >  165730.00
|   |   |   |   |   |   |   |   |   |--- feature_6 <= 1424.00
|   |   |   |   |   |   |   |   |   |   |--- feature_3 <= 2461.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 22
|   |   |   |   |   |   |   |   |   |   |--- feature_3 >  2461.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 8
|   |   |   |   |   |   |   |   |   |--- feature_6 >  1424.00
|   |   |   |   |   |   |   |   |   |   |--- feature_7 <= 171109.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 14
|   |   |   |   |   |   |   |   |   |   |--- feature_7 >  171109.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_15 >  0.50
|   |   |   |   |   |   |   |   |--- feature_23 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_33 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 1900.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 24
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  1900.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 11
|   |   |   |   |   |   |   |   |   |--- feature_33 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_3 <= 7312.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_3 >  7312.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |--- feature_23 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 6674.50
|   |   |   |   |   |   |   |   |   |   |--- feature_4 <= 4994.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |   |   |   |   |   |   |   |   |   |--- feature_4 >  4994.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |--- feature_3 >  6674.50
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 96767.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  96767.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |--- feature_16 >  0.50
|   |   |   |   |   |   |--- feature_25 <= 0.50
|   |   |   |   |   |   |   |--- feature_11 <= 511.00
|   |   |   |   |   |   |   |   |--- feature_6 <= 2.50
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 1553.50
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 25000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  25000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 12
|   |   |   |   |   |   |   |   |   |--- feature_3 >  1553.50
|   |   |   |   |   |   |   |   |   |   |--- feature_4 <= 6468.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_4 >  6468.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |--- feature_6 >  2.50
|   |   |   |   |   |   |   |   |   |--- feature_6 <= 323.00
|   |   |   |   |   |   |   |   |   |   |--- feature_12 <= 235.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- feature_12 >  235.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_6 >  323.00
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 115000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  115000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |   |   |   |   |   |   |--- feature_11 >  511.00
|   |   |   |   |   |   |   |   |--- feature_44 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_4 <= 69088.50
|   |   |   |   |   |   |   |   |   |   |--- feature_62 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 14
|   |   |   |   |   |   |   |   |   |   |--- feature_62 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |   |   |--- feature_4 >  69088.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_44 >  0.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_25 >  0.50
|   |   |   |   |   |   |   |--- feature_7 <= 7221.00
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_7 >  7221.00
|   |   |   |   |   |   |   |   |--- feature_2 <= 22.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_2 >  22.50
|   |   |   |   |   |   |   |   |   |--- feature_10 <= 1900.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_10 >  1900.00
|   |   |   |   |   |   |   |   |   |   |--- feature_7 <= 27947.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_7 >  27947.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- feature_9 >  1999.00
|   |   |   |   |   |--- feature_25 <= 0.50
|   |   |   |   |   |   |--- feature_44 <= 0.50
|   |   |   |   |   |   |   |--- feature_2 <= 33.50
|   |   |   |   |   |   |   |   |--- feature_15 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 78752.50
|   |   |   |   |   |   |   |   |   |   |--- feature_4 <= 50365.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 13
|   |   |   |   |   |   |   |   |   |   |--- feature_4 >  50365.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |   |--- feature_0 >  78752.50
|   |   |   |   |   |   |   |   |   |   |--- feature_54 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 18
|   |   |   |   |   |   |   |   |   |   |--- feature_54 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_15 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 177750.50
|   |   |   |   |   |   |   |   |   |   |--- feature_10 <= 2998.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |   |--- feature_10 >  2998.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |--- feature_0 >  177750.50
|   |   |   |   |   |   |   |   |   |   |--- feature_6 <= 180991.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |   |   |--- feature_6 >  180991.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |--- feature_2 >  33.50
|   |   |   |   |   |   |   |   |--- feature_74 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_11 <= 4985.00
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 4822.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 23
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  4822.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_11 >  4985.00
|   |   |   |   |   |   |   |   |   |   |--- feature_4 <= 200917.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 19
|   |   |   |   |   |   |   |   |   |   |--- feature_4 >  200917.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |--- feature_74 >  0.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_44 >  0.50
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_25 >  0.50
|   |   |   |   |   |   |--- feature_13 <= 15318.50
|   |   |   |   |   |   |   |--- feature_3 <= 919.50
|   |   |   |   |   |   |   |   |--- feature_0 <= 200000.00
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_0 >  200000.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_3 >  919.50
|   |   |   |   |   |   |   |   |--- feature_9 <= 2103.50
|   |   |   |   |   |   |   |   |   |--- feature_5 <= 44253.00
|   |   |   |   |   |   |   |   |   |   |--- feature_12 <= 2500.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_12 >  2500.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_5 >  44253.00
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 1722.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  1722.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |--- feature_9 >  2103.50
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 67640.50
|   |   |   |   |   |   |   |   |   |   |--- feature_13 <= 4414.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_13 >  4414.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |--- feature_3 >  67640.50
|   |   |   |   |   |   |   |   |   |   |--- feature_16 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |   |   |--- feature_16 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |   |   |   |   |   |--- feature_13 >  15318.50
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |--- feature_1 >  0.50
|   |   |   |   |--- feature_10 <= 3998.00
|   |   |   |   |   |--- feature_16 <= 0.50
|   |   |   |   |   |   |--- feature_0 <= 199257.00
|   |   |   |   |   |   |   |--- feature_2 <= 40.50
|   |   |   |   |   |   |   |   |--- feature_15 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 109709.50
|   |   |   |   |   |   |   |   |   |   |--- feature_5 <= 3.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |   |   |--- feature_5 >  3.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 16
|   |   |   |   |   |   |   |   |   |--- feature_0 >  109709.50
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 2821.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  2821.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |--- feature_15 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_12 <= 9972.00
|   |   |   |   |   |   |   |   |   |   |--- feature_8 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 14
|   |   |   |   |   |   |   |   |   |   |--- feature_8 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 12
|   |   |   |   |   |   |   |   |   |--- feature_12 >  9972.00
|   |   |   |   |   |   |   |   |   |   |--- feature_8 <= 78497.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_8 >  78497.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_2 >  40.50
|   |   |   |   |   |   |   |   |--- feature_24 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_54 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_2 <= 56.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 17
|   |   |   |   |   |   |   |   |   |   |--- feature_2 >  56.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |--- feature_54 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_24 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 1.50
|   |   |   |   |   |   |   |   |   |   |--- feature_2 <= 48.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 8
|   |   |   |   |   |   |   |   |   |   |--- feature_2 >  48.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_3 >  1.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_0 >  199257.00
|   |   |   |   |   |   |   |--- feature_9 <= 998.50
|   |   |   |   |   |   |   |   |--- feature_23 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_13 <= 388.50
|   |   |   |   |   |   |   |   |   |   |--- feature_8 <= 1.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 15
|   |   |   |   |   |   |   |   |   |   |--- feature_8 >  1.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |--- feature_13 >  388.50
|   |   |   |   |   |   |   |   |   |   |--- feature_53 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 12
|   |   |   |   |   |   |   |   |   |   |--- feature_53 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |--- feature_23 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 11149.50
|   |   |   |   |   |   |   |   |   |   |--- feature_17 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |   |   |   |   |   |   |   |   |   |--- feature_17 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_3 >  11149.50
|   |   |   |   |   |   |   |   |   |   |--- feature_6 <= 37.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_6 >  37.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |--- feature_9 >  998.50
|   |   |   |   |   |   |   |   |--- feature_64 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 914.00
|   |   |   |   |   |   |   |   |   |   |--- feature_23 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |   |--- feature_23 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_3 >  914.00
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 3137.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 16
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  3137.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |--- feature_64 >  0.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_16 >  0.50
|   |   |   |   |   |   |--- feature_0 <= 109003.00
|   |   |   |   |   |   |   |--- feature_2 <= 41.50
|   |   |   |   |   |   |   |   |--- feature_8 <= 50033.50
|   |   |   |   |   |   |   |   |   |--- feature_4 <= 39109.50
|   |   |   |   |   |   |   |   |   |   |--- feature_9 <= 1211.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |   |   |   |--- feature_9 >  1211.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |--- feature_4 >  39109.50
|   |   |   |   |   |   |   |   |   |   |--- feature_7 <= 20061.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_7 >  20061.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |--- feature_8 >  50033.50
|   |   |   |   |   |   |   |   |   |--- feature_33 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_33 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_5 <= 75784.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_5 >  75784.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |--- feature_2 >  41.50
|   |   |   |   |   |   |   |   |--- feature_64 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_9 <= 1168.00
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 65000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  65000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_9 >  1168.00
|   |   |   |   |   |   |   |   |   |   |--- feature_46 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 12
|   |   |   |   |   |   |   |   |   |   |--- feature_46 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_64 >  0.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_0 >  109003.00
|   |   |   |   |   |   |   |--- feature_25 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_11 <= 567.50
|   |   |   |   |   |   |   |   |   |--- feature_6 <= 16.50
|   |   |   |   |   |   |   |   |   |   |--- feature_2 <= 60.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |   |   |--- feature_2 >  60.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |--- feature_6 >  16.50
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 124349.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  124349.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |   |--- feature_11 >  567.50
|   |   |   |   |   |   |   |   |   |--- feature_5 <= 92460.00
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 12196.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  12196.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_5 >  92460.00
|   |   |   |   |   |   |   |   |   |   |--- feature_3 <= 95705.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_3 >  95705.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_25 >  0.50
|   |   |   |   |   |   |   |   |--- feature_6 <= 506.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_6 >  506.00
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- feature_10 >  3998.00
|   |   |   |   |   |--- feature_33 <= 0.50
|   |   |   |   |   |   |--- feature_9 <= 7194.00
|   |   |   |   |   |   |   |--- feature_23 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_4 <= 7156.50
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 279610.50
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 3843.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  3843.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |--- feature_0 >  279610.50
|   |   |   |   |   |   |   |   |   |   |--- feature_24 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |   |--- feature_24 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |--- feature_4 >  7156.50
|   |   |   |   |   |   |   |   |   |--- feature_12 <= 28.00
|   |   |   |   |   |   |   |   |   |   |--- feature_13 <= 2921.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_13 >  2921.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_12 >  28.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_23 >  0.50
|   |   |   |   |   |   |   |   |--- feature_6 <= 8506.50
|   |   |   |   |   |   |   |   |   |--- feature_2 <= 59.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_2 >  59.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_6 >  8506.50
|   |   |   |   |   |   |   |   |   |--- feature_14 <= 4254.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_14 >  4254.50
|   |   |   |   |   |   |   |   |   |   |--- feature_9 <= 4447.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_9 >  4447.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- feature_9 >  7194.00
|   |   |   |   |   |   |   |--- feature_7 <= 127951.00
|   |   |   |   |   |   |   |   |--- feature_2 <= 27.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_2 >  27.50
|   |   |   |   |   |   |   |   |   |--- feature_11 <= 5993.50
|   |   |   |   |   |   |   |   |   |   |--- feature_5 <= 15511.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |   |   |   |--- feature_5 >  15511.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_11 >  5993.50
|   |   |   |   |   |   |   |   |   |   |--- feature_64 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |   |   |   |   |   |   |   |   |   |--- feature_64 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_7 >  127951.00
|   |   |   |   |   |   |   |   |--- feature_10 <= 94158.00
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 170000.00
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_0 >  170000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 400000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  400000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_10 >  94158.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- feature_33 >  0.50
|   |   |   |   |   |   |--- feature_23 <= 0.50
|   |   |   |   |   |   |   |--- feature_22 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_2 <= 56.50
|   |   |   |   |   |   |   |   |   |--- feature_8 <= 52.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_8 >  52.50
|   |   |   |   |   |   |   |   |   |   |--- feature_13 <= 39.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- feature_13 >  39.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |   |--- feature_2 >  56.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_22 >  0.50
|   |   |   |   |   |   |   |   |--- feature_4 <= 8520.50
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 80000.00
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_0 >  80000.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_4 >  8520.50
|   |   |   |   |   |   |   |   |   |--- feature_7 <= 144459.50
|   |   |   |   |   |   |   |   |   |   |--- feature_3 <= 17356.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |   |--- feature_3 >  17356.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_7 >  144459.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_23 >  0.50
|   |   |   |   |   |   |   |--- feature_16 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_0 <= 169820.50
|   |   |   |   |   |   |   |   |   |--- feature_12 <= 4971.00
|   |   |   |   |   |   |   |   |   |   |--- feature_10 <= 4000.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- feature_10 >  4000.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 15
|   |   |   |   |   |   |   |   |   |--- feature_12 >  4971.00
|   |   |   |   |   |   |   |   |   |   |--- feature_12 <= 5229.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |   |--- feature_12 >  5229.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |--- feature_0 >  169820.50
|   |   |   |   |   |   |   |   |   |--- feature_2 <= 45.50
|   |   |   |   |   |   |   |   |   |   |--- feature_8 <= 157315.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 13
|   |   |   |   |   |   |   |   |   |   |--- feature_8 >  157315.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 13
|   |   |   |   |   |   |   |   |   |--- feature_2 >  45.50
|   |   |   |   |   |   |   |   |   |   |--- feature_12 <= 7148.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |   |--- feature_12 >  7148.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |--- feature_16 >  0.50
|   |   |   |   |   |   |   |   |--- feature_5 <= 15408.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_5 >  15408.50
|   |   |   |   |   |   |   |   |   |--- feature_9 <= 1870.00
|   |   |   |   |   |   |   |   |   |   |--- feature_53 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_53 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_9 >  1870.00
|   |   |   |   |   |   |   |   |   |   |--- feature_8 <= 194588.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 8
|   |   |   |   |   |   |   |   |   |   |--- feature_8 >  194588.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |--- feature_35 >  0.50
|   |   |--- feature_42 <= 0.50
|   |   |   |--- feature_24 <= 0.50
|   |   |   |   |--- feature_22 <= 0.50
|   |   |   |   |   |--- feature_64 <= 0.50
|   |   |   |   |   |   |--- feature_19 <= 0.50
|   |   |   |   |   |   |   |--- feature_15 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_73 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_14 <= 5062.00
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 16167.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 13
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  16167.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_14 >  5062.00
|   |   |   |   |   |   |   |   |   |   |--- feature_10 <= 6550.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |   |   |   |   |   |   |   |   |   |--- feature_10 >  6550.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_73 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_13 <= 2017.00
|   |   |   |   |   |   |   |   |   |   |--- feature_5 <= 29742.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 11
|   |   |   |   |   |   |   |   |   |   |--- feature_5 >  29742.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 8
|   |   |   |   |   |   |   |   |   |--- feature_13 >  2017.00
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 225000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  225000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |--- feature_15 >  0.50
|   |   |   |   |   |   |   |   |--- feature_4 <= 93.50
|   |   |   |   |   |   |   |   |   |--- feature_13 <= 3033.50
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 430000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  430000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_13 >  3033.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_4 >  93.50
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 370000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_2 <= 66.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 13
|   |   |   |   |   |   |   |   |   |   |--- feature_2 >  66.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_0 >  370000.00
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- feature_19 >  0.50
|   |   |   |   |   |   |   |--- feature_16 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_5 <= 158.00
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 175000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_1 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_1 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |--- feature_0 >  175000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_8 <= 88.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_8 >  88.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |--- feature_5 >  158.00
|   |   |   |   |   |   |   |   |   |--- feature_13 <= 28000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_9 <= 4894.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 14
|   |   |   |   |   |   |   |   |   |   |--- feature_9 >  4894.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 12
|   |   |   |   |   |   |   |   |   |--- feature_13 >  28000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 378.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  378.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_16 >  0.50
|   |   |   |   |   |   |   |   |--- feature_0 <= 45000.00
|   |   |   |   |   |   |   |   |   |--- feature_9 <= 3490.00
|   |   |   |   |   |   |   |   |   |   |--- feature_10 <= 4667.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |   |   |   |--- feature_10 >  4667.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_9 >  3490.00
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 1650.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  1650.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_0 >  45000.00
|   |   |   |   |   |   |   |   |   |--- feature_6 <= 46058.00
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 115000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  115000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_6 >  46058.00
|   |   |   |   |   |   |   |   |   |   |--- feature_6 <= 135721.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 8
|   |   |   |   |   |   |   |   |   |   |--- feature_6 >  135721.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |--- feature_64 >  0.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- feature_22 >  0.50
|   |   |   |   |   |--- feature_45 <= 0.50
|   |   |   |   |   |   |--- feature_10 <= 3679.50
|   |   |   |   |   |   |   |--- feature_6 <= 9332.50
|   |   |   |   |   |   |   |   |--- feature_11 <= 1665.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_11 >  1665.50
|   |   |   |   |   |   |   |   |   |--- feature_6 <= 2679.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_6 >  2679.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_6 >  9332.50
|   |   |   |   |   |   |   |   |--- feature_4 <= 8840.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_4 >  8840.50
|   |   |   |   |   |   |   |   |   |--- feature_13 <= 500.00
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 811.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  811.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_13 >  500.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_10 >  3679.50
|   |   |   |   |   |   |   |--- feature_52 <= 0.50
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_52 >  0.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_45 >  0.50
|   |   |   |   |   |   |--- feature_0 <= 125000.00
|   |   |   |   |   |   |   |--- feature_1 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_13 <= 458.00
|   |   |   |   |   |   |   |   |   |--- feature_2 <= 30.50
|   |   |   |   |   |   |   |   |   |   |--- feature_18 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_18 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_2 >  30.50
|   |   |   |   |   |   |   |   |   |   |--- feature_9 <= 675.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_9 >  675.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_13 >  458.00
|   |   |   |   |   |   |   |   |   |--- feature_6 <= 250.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_6 >  250.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_1 >  0.50
|   |   |   |   |   |   |   |   |--- feature_5 <= 13012.00
|   |   |   |   |   |   |   |   |   |--- feature_4 <= 1228.00
|   |   |   |   |   |   |   |   |   |   |--- feature_9 <= 100.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_9 >  100.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_4 >  1228.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_5 >  13012.00
|   |   |   |   |   |   |   |   |   |--- feature_15 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_15 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_0 >  125000.00
|   |   |   |   |   |   |   |--- feature_12 <= 1952.00
|   |   |   |   |   |   |   |   |--- feature_8 <= -838.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_8 >  -838.50
|   |   |   |   |   |   |   |   |   |--- feature_52 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_52 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 185000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  185000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |--- feature_12 >  1952.00
|   |   |   |   |   |   |   |   |--- feature_14 <= 4311.50
|   |   |   |   |   |   |   |   |   |--- feature_4 <= 1184.00
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 660.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  660.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_4 >  1184.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_14 >  4311.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |--- feature_24 >  0.50
|   |   |   |   |--- feature_74 <= 0.50
|   |   |   |   |   |--- feature_9 <= 67.50
|   |   |   |   |   |   |--- feature_0 <= 45000.00
|   |   |   |   |   |   |   |--- feature_10 <= 986.50
|   |   |   |   |   |   |   |   |--- feature_8 <= 141.00
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 4188.00
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_3 >  4188.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_8 >  141.00
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_10 >  986.50
|   |   |   |   |   |   |   |   |--- feature_8 <= 39300.50
|   |   |   |   |   |   |   |   |   |--- feature_8 <= 29967.50
|   |   |   |   |   |   |   |   |   |   |--- feature_7 <= 28919.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 11
|   |   |   |   |   |   |   |   |   |   |--- feature_7 >  28919.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_8 >  29967.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_8 >  39300.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- feature_0 >  45000.00
|   |   |   |   |   |   |   |--- feature_54 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_75 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_19 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 3800.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 11
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  3800.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |   |   |   |   |   |   |   |   |--- feature_19 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_13 <= 2550.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 14
|   |   |   |   |   |   |   |   |   |   |--- feature_13 >  2550.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 11
|   |   |   |   |   |   |   |   |--- feature_75 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_12 <= 1900.00
|   |   |   |   |   |   |   |   |   |   |--- feature_10 <= 5512.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |   |--- feature_10 >  5512.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_12 >  1900.00
|   |   |   |   |   |   |   |   |   |   |--- feature_2 <= 26.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_2 >  26.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |--- feature_54 >  0.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_9 >  67.50
|   |   |   |   |   |   |--- feature_10 <= 31.00
|   |   |   |   |   |   |   |--- feature_12 <= 2401.50
|   |   |   |   |   |   |   |   |--- feature_0 <= 55000.00
|   |   |   |   |   |   |   |   |   |--- feature_72 <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- feature_9 <= 1012.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_9 >  1012.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 9
|   |   |   |   |   |   |   |   |   |--- feature_72 >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_0 >  55000.00
|   |   |   |   |   |   |   |   |   |--- feature_8 <= 19764.50
|   |   |   |   |   |   |   |   |   |   |--- feature_6 <= 3214.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- feature_6 >  3214.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_8 >  19764.50
|   |   |   |   |   |   |   |   |   |   |--- feature_12 <= 1977.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |   |--- feature_12 >  1977.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |--- feature_12 >  2401.50
|   |   |   |   |   |   |   |   |--- feature_3 <= 52156.50
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 4249.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |--- feature_3 >  4249.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_3 >  52156.50
|   |   |   |   |   |   |   |   |   |--- feature_13 <= 3554.00
|   |   |   |   |   |   |   |   |   |   |--- feature_3 <= 129362.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- feature_3 >  129362.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |--- feature_13 >  3554.00
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 11009.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  11009.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_10 >  31.00
|   |   |   |   |   |   |   |--- feature_19 <= 0.50
|   |   |   |   |   |   |   |   |--- feature_10 <= 3550.00
|   |   |   |   |   |   |   |   |   |--- feature_9 <= 995.50
|   |   |   |   |   |   |   |   |   |   |--- feature_72 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 4
|   |   |   |   |   |   |   |   |   |   |--- feature_72 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_9 >  995.50
|   |   |   |   |   |   |   |   |   |   |--- feature_8 <= 32427.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |   |--- feature_8 >  32427.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 8
|   |   |   |   |   |   |   |   |--- feature_10 >  3550.00
|   |   |   |   |   |   |   |   |   |--- feature_14 <= 5016.50
|   |   |   |   |   |   |   |   |   |   |--- feature_4 <= 90814.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |   |--- feature_4 >  90814.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |--- feature_14 >  5016.50
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 9318.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  9318.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |--- feature_19 >  0.50
|   |   |   |   |   |   |   |   |--- feature_16 <= 0.50
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 238774.50
|   |   |   |   |   |   |   |   |   |   |--- feature_11 <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |   |--- feature_11 >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 13
|   |   |   |   |   |   |   |   |   |--- feature_3 >  238774.50
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 336272.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  336272.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |--- feature_16 >  0.50
|   |   |   |   |   |   |   |   |   |--- feature_14 <= 1932.50
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 615.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  615.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_14 >  1932.50
|   |   |   |   |   |   |   |   |   |   |--- feature_14 <= 4072.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_14 >  4072.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |--- feature_74 >  0.50
|   |   |   |   |   |--- class: 1
|   |   |--- feature_42 >  0.50
|   |   |   |--- feature_5 <= 329.50
|   |   |   |   |--- feature_2 <= 46.00
|   |   |   |   |   |--- feature_0 <= 325000.00
|   |   |   |   |   |   |--- feature_73 <= 0.50
|   |   |   |   |   |   |   |--- feature_7 <= 81.50
|   |   |   |   |   |   |   |   |--- feature_0 <= 140000.00
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_0 >  140000.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_7 >  81.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_73 >  0.50
|   |   |   |   |   |   |   |--- feature_5 <= 252.00
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_5 >  252.00
|   |   |   |   |   |   |   |   |--- feature_15 <= 0.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_15 >  0.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- feature_0 >  325000.00
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- feature_2 >  46.00
|   |   |   |   |   |--- class: 0
|   |   |   |--- feature_5 >  329.50
|   |   |   |   |--- feature_11 <= 3579.50
|   |   |   |   |   |--- feature_23 <= 0.50
|   |   |   |   |   |   |--- feature_7 <= 713.00
|   |   |   |   |   |   |   |--- feature_10 <= 2222.50
|   |   |   |   |   |   |   |   |--- feature_10 <= 1900.00
|   |   |   |   |   |   |   |   |   |--- feature_3 <= 14331.50
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 35000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  35000.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |--- feature_3 >  14331.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_10 >  1900.00
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- feature_10 >  2222.50
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- feature_7 >  713.00
|   |   |   |   |   |   |   |--- feature_7 <= 904.00
|   |   |   |   |   |   |   |   |--- feature_4 <= 890.00
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 40000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_0 <= 15000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |   |   |--- feature_0 >  15000.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_0 >  40000.00
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- feature_4 >  890.00
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_7 >  904.00
|   |   |   |   |   |   |   |   |--- feature_9 <= 613.00
|   |   |   |   |   |   |   |   |   |--- feature_0 <= 205000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_5 <= 1348.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |   |--- feature_5 >  1348.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 7
|   |   |   |   |   |   |   |   |   |--- feature_0 >  205000.00
|   |   |   |   |   |   |   |   |   |   |--- feature_12 <= 5852.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 5
|   |   |   |   |   |   |   |   |   |   |--- feature_12 >  5852.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |--- feature_9 >  613.00
|   |   |   |   |   |   |   |   |   |--- feature_2 <= 24.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- feature_2 >  24.50
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_23 >  0.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- feature_11 >  3579.50
|   |   |   |   |   |--- feature_16 <= 0.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- feature_16 >  0.50
|   |   |   |   |   |   |--- feature_3 <= 3299.50
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_3 >  3299.50
|   |   |   |   |   |   |   |--- class: 0
|--- feature_34 >  0.50
|   |--- feature_4 <= -938.00
|   |   |--- feature_73 <= 0.50
|   |   |   |--- feature_14 <= 2610.00
|   |   |   |   |--- class: 1
|   |   |   |--- feature_14 >  2610.00
|   |   |   |   |--- class: 0
|   |   |--- feature_73 >  0.50
|   |   |   |--- class: 0
|   |--- feature_4 >  -938.00
|   |   |--- feature_10 <= 157632.00
|   |   |   |--- feature_4 <= 0.50
|   |   |   |   |--- feature_9 <= 1753.50
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- feature_9 >  1753.50
|   |   |   |   |   |--- class: 0
|   |   |   |--- feature_4 >  0.50
|   |   |   |   |--- feature_9 <= 19872.00
|   |   |   |   |   |--- feature_0 <= 399282.00
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_0 >  399282.00
|   |   |   |   |   |   |--- feature_11 <= 32.50
|   |   |   |   |   |   |   |--- feature_62 <= 0.50
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_62 >  0.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_11 >  32.50
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- feature_9 >  19872.00
|   |   |   |   |   |--- feature_13 <= 9198.00
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_13 >  9198.00
|   |   |   |   |   |   |--- feature_6 <= 441600.00
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- feature_6 >  441600.00
|   |   |   |   |   |   |   |--- class: 1
|   |   |--- feature_10 >  157632.00
|   |   |   |--- class: 0

In [136]:
train_preds = optimized_clf.predict_proba(X_train)[:,1]
test_preds = optimized_clf.predict_proba(X_test)[:,1]
In [137]:
train_class_preds = optimized_clf.predict(X_train)
test_class_preds = optimized_clf.predict(X_test)
In [138]:
# Get the accuracy scores
train_accuracy_ = accuracy_score(train_class_preds,y_train)
test_accuracy_lr = accuracy_score(test_class_preds,y_test)

print("The accuracy on train data is ", train_accuracy_lr)
print("The accuracy on test data is ", test_accuracy_lr)
The accuracy on train data is  0.7538250231577602
The accuracy on test data is  0.49957849685493805
In [139]:
#print the accuracy,precission,recall,f1,roc_score 
test_accuracy_lr= accuracy_score(test_class_preds,y_test)
test_precision_score_lr = precision_score(test_class_preds,y_test)
test_recall_score_lr = recall_score(test_class_preds,y_test)
test_f1_score_lr = f1_score(test_class_preds,y_test)
test_roc_score_lr = roc_auc_score(test_class_preds,y_test)

print("The accuracy on test data is ", test_accuracy_lr)
print("The precision on test data is ", test_precision_score_lr)
print("The recall on test data is ", test_recall_score_lr)
print("The f1 on test data is ", test_f1_score_lr)
print("The roc_score on test data is ", test_roc_score_lr)
The accuracy on test data is  0.49957849685493805
The precision on test data is  0.0006485084306095979
The recall on test data is  0.29411764705882354
The f1 on test data is  0.0012941633234114145
The roc_score on test data is  0.3969614462248156
In [140]:
cm_lr = confusion_matrix(y_train, train_class_preds)
print(cm_lr)
[[15636    17]
 [15646     8]]
In [141]:
labels = ['Not Defaulter', 'Defaulter']
ax= plt.subplot()
sns.heatmap(cm_lr, annot=True, ax = ax) #annot=True to annotate cells

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(labels)
ax.yaxis.set_ticklabels(labels)
Out[141]:
[Text(0, 0.5, 'Not Defaulter'), Text(0, 1.5, 'Defaulter')]
In [142]:
len(optimal_rf_clf.feature_importances_)
Out[142]:
82
In [143]:
train_class_preds = optimal_rf_clf.predict(X_train)
test_class_preds = optimal_rf_clf.predict(X_test)
In [144]:
y_preds_proba_rf = optimal_rf_clf.predict_proba(X_test)[::,1]
In [145]:
import sklearn.metrics as metrics
y_pred_proba = y_preds_proba_rf
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()
In [146]:
def G(v):
    bins = np.linspace(0., 100., 11)
    total = float(np.sum(v))
    yvals = []
    for b in bins:
        bin_vals = v[v <= np.percentile(v, b)]
        bin_fraction = (np.sum(bin_vals) / total) * 100.0
        yvals.append(bin_fraction)
    # perfect equality area
    pe_area = np.trapz(bins, x=bins)
    # lorenz area
    lorenz_area = np.trapz(yvals, x=bins)
    gini_val = (pe_area - lorenz_area) / float(pe_area)
    return bins, yvals, gini_val
bins, result, gini_val = G(y_preds_proba_rf)
plt.figure()
plt.subplot(2, 1, 1)
plt.plot(bins, result, label="observed")
plt.plot(bins, bins, '--', label="perfect eq.")
plt.xlabel("fraction of population")
plt.ylabel("fraction of wealth")
plt.title("GINI: %.4f" %(gini_val))

                                                                                  
Out[146]:
Text(0.5, 1.0, 'GINI: 0.1057')
In [147]:
from sklearn.neighbors import KNeighborsClassifier 
In [148]:
credit_df_knn=credit_df_copy.copy()
In [149]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify = y)
In [150]:
from sklearn.preprocessing import StandardScaler 
scaler = StandardScaler() 
scaler.fit(X_train)
X_train = scaler.transform(X_train) 
X_test = scaler.transform(X_test)
In [151]:
from sklearn.neighbors import KNeighborsClassifier 
classifier = KNeighborsClassifier(n_neighbors=8) 
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test) 
In [152]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
result = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(result)
result1 = classification_report(y_test, y_pred)
print("Classification Report:",)
print (result1)
result2 = accuracy_score(y_test,y_pred) 
print("Accuracy:",result2)
Confusion Matrix:
[[6373 1338]
 [2328 5382]]
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.83      0.78      7711
           1       0.80      0.70      0.75      7710

    accuracy                           0.76     15421
   macro avg       0.77      0.76      0.76     15421
weighted avg       0.77      0.76      0.76     15421

Accuracy: 0.7622722261850723
In [153]:
train_preds = optimized_clf.predict_proba(X_train)[:,1]
test_preds = optimized_clf.predict_proba(X_test)[:,1]
In [154]:
train_class_preds = optimized_clf.predict(X_train)
test_class_preds = optimized_clf.predict(X_test)
In [155]:
# Get the accuracy scores
train_accuracy_ = accuracy_score(train_class_preds,y_train)
test_accuracy_lr = accuracy_score(test_class_preds,y_test)

print("The accuracy on train data is ", train_accuracy_lr)
print("The accuracy on test data is ", test_accuracy_lr)
The accuracy on train data is  0.7538250231577602
The accuracy on test data is  0.7522858439789897
In [156]:
#print the accuracy,precission,recall,f1,roc_score 
test_accuracy_lr= accuracy_score(test_class_preds,y_test)
test_precision_score_lr = precision_score(test_class_preds,y_test)
test_recall_score_lr = recall_score(test_class_preds,y_test)
test_f1_score_lr = f1_score(test_class_preds,y_test)
test_roc_score_lr = roc_auc_score(test_class_preds,y_test)

print("The accuracy on test data is ", test_accuracy_lr)
print("The precision on test data is ", test_precision_score_lr)
print("The recall on test data is ", test_recall_score_lr)
print("The f1 on test data is ", test_f1_score_lr)
print("The roc_score on test data is ", test_roc_score_lr)
The accuracy on test data is  0.7522858439789897
The precision on test data is  0.6888456549935149
The recall on test data is  0.7889185977421271
The f1 on test data is  0.7354936989336657
The roc_score on test data is  0.7564111920693602
In [157]:
cm_lr = confusion_matrix(y_train, train_class_preds)
print(cm_lr)
[[12793  2860]
 [ 4852 10802]]
In [158]:
labels = ['Not Defaulter', 'Defaulter']
ax= plt.subplot()
sns.heatmap(cm_lr, annot=True, ax = ax) #annot=True to annotate cells

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(labels)
ax.yaxis.set_ticklabels(labels)
Out[158]:
[Text(0, 0.5, 'Not Defaulter'), Text(0, 1.5, 'Defaulter')]
In [159]:
len(optimal_rf_clf.feature_importances_)
Out[159]:
82
In [160]:
train_class_preds = optimal_rf_clf.predict(X_train)
test_class_preds = optimal_rf_clf.predict(X_test)
In [161]:
y_preds_proba_rf = optimal_rf_clf.predict_proba(X_test)[::,1]
In [162]:
def G(v):
    bins = np.linspace(0., 100., 11)
    total = float(np.sum(v))
    yvals = []
    for b in bins:
        bin_vals = v[v <= np.percentile(v, b)]
        bin_fraction = (np.sum(bin_vals) / total) * 100.0
        yvals.append(bin_fraction)
    # perfect equality area
    pe_area = np.trapz(bins, x=bins)
    # lorenz area
    lorenz_area = np.trapz(yvals, x=bins)
    gini_val = (pe_area - lorenz_area) / float(pe_area)
    return bins, yvals, gini_val
bins, result, gini_val = G(y_preds_proba_rf)
plt.figure()
plt.subplot(2, 1, 1)
plt.plot(bins, result, label="observed")
plt.plot(bins, bins, '--', label="perfect eq.")
plt.xlabel("fraction of population")
plt.ylabel("fraction of wealth")
plt.title("GINI: %.4f" %(gini_val))
Out[162]:
Text(0.5, 1.0, 'GINI: 0.3130')
In [163]:
import sklearn.metrics as metrics
y_pred_proba = y_preds_proba_rf
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()
In [166]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify = y)

# Initialize Gradient Boosting classifier
gb = GradientBoostingClassifier()

# Train the model on the training set
gb.fit(X_train, y_train)

# Predict on the test set
y_pred = gb.predict(X_test)

# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
Accuracy: 0.7786784255236366
In [168]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
result = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(result)
result1 = classification_report(y_test, y_pred)
print("Classification Report:",)
print (result1)
result2 = accuracy_score(y_test,y_pred) 
print("Accuracy:",result2)
Confusion Matrix:
[[6367 1344]
 [2069 5641]]
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.83      0.79      7711
           1       0.81      0.73      0.77      7710

    accuracy                           0.78     15421
   macro avg       0.78      0.78      0.78     15421
weighted avg       0.78      0.78      0.78     15421

Accuracy: 0.7786784255236366
In [170]:
train_preds = optimized_clf.predict_proba(X_train)[:,1]
test_preds = optimized_clf.predict_proba(X_test)[:,1]
In [171]:
train_class_preds = optimized_clf.predict(X_train)
test_class_preds = optimized_clf.predict(X_test)
In [172]:
# Get the accuracy scores
train_accuracy_ = accuracy_score(train_class_preds,y_train)
test_accuracy_lr = accuracy_score(test_class_preds,y_test)

print("The accuracy on train data is ", train_accuracy_lr)
print("The accuracy on test data is ", test_accuracy_lr)
The accuracy on train data is  0.7538250231577602
The accuracy on test data is  0.49957849685493805
In [173]:
#print the accuracy,precission,recall,f1,roc_score 
test_accuracy_lr= accuracy_score(test_class_preds,y_test)
test_precision_score_lr = precision_score(test_class_preds,y_test)
test_recall_score_lr = recall_score(test_class_preds,y_test)
test_f1_score_lr = f1_score(test_class_preds,y_test)
test_roc_score_lr = roc_auc_score(test_class_preds,y_test)

print("The accuracy on test data is ", test_accuracy_lr)
print("The precision on test data is ", test_precision_score_lr)
print("The recall on test data is ", test_recall_score_lr)
print("The f1 on test data is ", test_f1_score_lr)
print("The roc_score on test data is ", test_roc_score_lr)
The accuracy on test data is  0.49957849685493805
The precision on test data is  0.0006485084306095979
The recall on test data is  0.29411764705882354
The f1 on test data is  0.0012941633234114145
The roc_score on test data is  0.3969614462248156
In [174]:
cm_lr = confusion_matrix(y_train, train_class_preds)
print(cm_lr)
[[15636    17]
 [15646     8]]
In [175]:
labels = ['Not Defaulter', 'Defaulter']
ax= plt.subplot()
sns.heatmap(cm_lr, annot=True, ax = ax) #annot=True to annotate cells

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(labels)
ax.yaxis.set_ticklabels(labels)
Out[175]:
[Text(0, 0.5, 'Not Defaulter'), Text(0, 1.5, 'Defaulter')]
In [176]:
len(optimal_rf_clf.feature_importances_)
Out[176]:
82
In [177]:
train_class_preds = optimal_rf_clf.predict(X_train)
test_class_preds = optimal_rf_clf.predict(X_test)
In [178]:
y_preds_proba_rf = optimal_rf_clf.predict_proba(X_test)[::,1]
In [179]:
def G(v):
    bins = np.linspace(0., 100., 11)
    total = float(np.sum(v))
    yvals = []
    for b in bins:
        bin_vals = v[v <= np.percentile(v, b)]
        bin_fraction = (np.sum(bin_vals) / total) * 100.0
        yvals.append(bin_fraction)
    # perfect equality area
    pe_area = np.trapz(bins, x=bins)
    # lorenz area
    lorenz_area = np.trapz(yvals, x=bins)
    gini_val = (pe_area - lorenz_area) / float(pe_area)
    return bins, yvals, gini_val
bins, result, gini_val = G(y_preds_proba_rf)
plt.figure()
plt.subplot(2, 1, 1)
plt.plot(bins, result, label="observed")
plt.plot(bins, bins, '--', label="perfect eq.")
plt.xlabel("fraction of population")
plt.ylabel("fraction of wealth")
plt.title("GINI: %.4f" %(gini_val))
Out[179]:
Text(0.5, 1.0, 'GINI: 0.1057')
In [180]:
import sklearn.metrics as metrics
y_pred_proba = y_preds_proba_rf
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()
In [ ]: